# -*- coding:utf-8 -*-

import requests
import re
import xlwt#用来创建excel文档并写入数据

#获取原码
#%25E9%25A1%25B9%25E7%259B%25AE%25E7%25BB%258F%25E7%2590%2586  项目经理
# def get_content(page):
#     url ='http://search.51job.com/list/000000,000000,0000,00,9,99,python,2,'+ str(page)+'.html'
#     a = urllib3.request.urlopen(url)#打开网址
#     html = a.read().decode('gbk')#读取源代码并转为unicode
#     return html
def get_content(page):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
    }
    # 西安 200200项目经理
    url ='http://search.51job.com/list/200200,000000,0000,00,9,99,%25E9%25A1%25B9%25E7%259B%25AE%25E7%25BB%258F%25E7%2590%2586,2,'+ str(page)+'.html'

    response = requests.get(url,headers=headers)
    if response.status_code == 200:
        # print(response.apparent_encoding)#查看编码
        response.encoding=response.apparent_encoding
        return response.text
    return ''

def get(html):
    reg = re.compile(r'class="t1 ">.*? <a target="_blank" title="(.*?)".*? <span class="t2"><a target="_blank" title="(.*?)".*?<span class="t3">(.*?)</span>.*?<span class="t4">(.*?)</span>.*? <span class="t5">(.*?)</span>',re.S)#匹配换行符
    items = re.findall(reg,html)
    return items
def excel_write(items,index):

#爬取到的内容写入excel表格
    for item in items:#职位信息
        for i in range(0,5):
            #print item[i]
            ws.write(index,i,item[i])#行，列，数据
        print(index)
        index+=1

newTable="test1.xls"#表格名称
wb = xlwt.Workbook(encoding='utf-8')#创建excel文件，声明编码
ws = wb.add_sheet('sheet1')#创建表格
headData = ['招聘职位','公司','地址','薪资','日期']#表头部信息
for colnum in range(0, 5):
    ws.write(0, colnum, headData[colnum], xlwt.easyxf('font: bold on'))  # 行，列

    for each in range(1,5):
        index=(each-1)*50+1
        excel_write(get(get_content(each)),index)
    wb.save(newTable)